{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "DQfD_main.ipynb",
      "provenance": [],
      "mount_file_id": "https://github.com/Kokkini/DQfD/blob/master/DQfD_main.ipynb",
      "authorship_tag": "ABX9TyNN3C/WRwxcPxsVtMwSkHeP",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/Kokkini/DQfD/blob/master/DQfD_main.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "nLKIVoRu16Fy",
        "colab_type": "code",
        "outputId": "d781b081-e920-439d-e962-7a8bc6370554",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 288
        }
      },
      "source": [
        "%tensorflow_version 2.x\n",
        "# !pip install stable-baselines[mpi]==2.10.0\n",
        "!pip install gym\n",
        "!pip install pynput"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Requirement already satisfied: gym in /usr/local/lib/python3.6/dist-packages (0.17.2)\n",
            "Requirement already satisfied: pyglet<=1.5.0,>=1.4.0 in /usr/local/lib/python3.6/dist-packages (from gym) (1.5.0)\n",
            "Requirement already satisfied: numpy>=1.10.4 in /usr/local/lib/python3.6/dist-packages (from gym) (1.18.4)\n",
            "Requirement already satisfied: cloudpickle<1.4.0,>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from gym) (1.3.0)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from gym) (1.4.1)\n",
            "Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from pyglet<=1.5.0,>=1.4.0->gym) (0.16.0)\n",
            "Collecting pynput\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/33/0a/ea13c055a90b1aff5945e7eb330584f15e5282aead15a8f3cdb977a1534e/pynput-1.6.8-py2.py3-none-any.whl (91kB)\n",
            "\u001b[K     |████████████████████████████████| 92kB 5.4MB/s \n",
            "\u001b[?25hCollecting python-xlib>=0.17; \"linux\" in sys_platform\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/33/10/2eb938852a9bdf6745808f141c9fede76b1bd5a9530859bacc71985d29d9/python_xlib-0.27-py2.py3-none-any.whl (174kB)\n",
            "\u001b[K     |████████████████████████████████| 184kB 14.3MB/s \n",
            "\u001b[?25hRequirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from pynput) (1.12.0)\n",
            "Installing collected packages: python-xlib, pynput\n",
            "Successfully installed pynput-1.6.8 python-xlib-0.27\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "99IlmkkQ7mcr",
        "colab_type": "code",
        "outputId": "85b49f84-1c02-4eea-edf8-156de0e92565",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 301
        }
      },
      "source": [
        "!nvidia-smi"
      ],
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Sun May 31 19:01:17 2020       \n",
            "+-----------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |\n",
            "|-------------------------------+----------------------+----------------------+\n",
            "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
            "|===============================+======================+======================|\n",
            "|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |\n",
            "| N/A   37C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |\n",
            "+-------------------------------+----------------------+----------------------+\n",
            "                                                                               \n",
            "+-----------------------------------------------------------------------------+\n",
            "| Processes:                                                       GPU Memory |\n",
            "|  GPU       PID   Type   Process name                             Usage      |\n",
            "|=============================================================================|\n",
            "|  No running processes found                                                 |\n",
            "+-----------------------------------------------------------------------------+\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "B7VAg5r42H9q",
        "colab_type": "code",
        "outputId": "dbca415b-056e-49dd-d099-6f8afaa1fb1a",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 150
        }
      },
      "source": [
        "from getpass import getpass\n",
        "\n",
        "def clone_with_token(repo_name, owner_name=\"Kokkini\", user_email=\"trannhatquang1104@gmail.com\", user_name=\"Kokkini\"):\n",
        "  GIT_TOKEN = getpass('insert token: ')\n",
        "  GIT_PATH = f\"https://{GIT_TOKEN}@github.com/{owner_name}/{repo_name}.git\"\n",
        "  !git config --global user.email \"{user_email}\"\n",
        "  !git config --global user.name \"{user_name}\"\n",
        "  !git clone \"{GIT_PATH}\"\n",
        "  GIT_TOKEN, GIT_PATH = \"\", \"\"\n",
        "clone_with_token(\"DQfD\")"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "insert token: ··········\n",
            "Cloning into 'DQfD'...\n",
            "remote: Enumerating objects: 152, done.\u001b[K\n",
            "remote: Counting objects: 100% (152/152), done.\u001b[K\n",
            "remote: Compressing objects: 100% (131/131), done.\u001b[K\n",
            "remote: Total 152 (delta 85), reused 50 (delta 18), pack-reused 0\u001b[K\n",
            "Receiving objects: 100% (152/152), 66.46 KiB | 782.00 KiB/s, done.\n",
            "Resolving deltas: 100% (85/85), done.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "3tBPFxBF55tf",
        "colab_type": "code",
        "outputId": "379ff149-884c-4fef-bae6-2f4d95da1774",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 33
        }
      },
      "source": [
        "%cd DQfD/"
      ],
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "/content/DQfD\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "o1mT261H2VMr",
        "colab_type": "code",
        "outputId": "5937b7e4-8933-407e-c8cd-18b5c2b5f1dc",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        }
      },
      "source": [
        "!python run_atari.py --env=BreakoutNoFrameskip-v4 --seed=0 --save_video_interval=100000 --pre_train_timesteps=80000 --save_path=\"/content/drive/My Drive/Colab Notebooks/imitation_RL\" --load_path=\"/content/drive/My Drive/Colab Notebooks/imitation_RL\" --demo_path=\"/content/drive/My Drive/Colab Notebooks/imitation_RL/human.BreakoutNoFrameskip-v4.pkl\" --log_path=\"/content/drive/My Drive/Colab Notebooks/imitation_RL/logs\" "
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "2020-05-31 19:02:05.688015: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1\n",
            "Logging to /content/drive/My Drive/Colab Notebooks/imitation_RL/logs\n",
            "2020-05-31 19:02:08.262291: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1\n",
            "2020-05-31 19:02:08.302672: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2020-05-31 19:02:08.303265: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: \n",
            "pciBusID: 0000:00:04.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0\n",
            "coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 15.90GiB deviceMemoryBandwidth: 681.88GiB/s\n",
            "2020-05-31 19:02:08.303310: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1\n",
            "2020-05-31 19:02:08.552607: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10\n",
            "2020-05-31 19:02:08.690996: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10\n",
            "2020-05-31 19:02:08.767618: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10\n",
            "2020-05-31 19:02:09.064418: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10\n",
            "2020-05-31 19:02:09.092020: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10\n",
            "2020-05-31 19:02:09.635057: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7\n",
            "2020-05-31 19:02:09.635293: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2020-05-31 19:02:09.635936: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2020-05-31 19:02:09.636460: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1703] Adding visible gpu devices: 0\n",
            "2020-05-31 19:02:09.636983: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX512F\n",
            "2020-05-31 19:02:09.671576: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 2000170000 Hz\n",
            "2020-05-31 19:02:09.671766: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x1d88f40 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
            "2020-05-31 19:02:09.671796: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n",
            "2020-05-31 19:02:09.808684: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2020-05-31 19:02:09.809449: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x1d89100 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
            "2020-05-31 19:02:09.809480: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0\n",
            "2020-05-31 19:02:09.811059: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2020-05-31 19:02:09.811602: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: \n",
            "pciBusID: 0000:00:04.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0\n",
            "coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 15.90GiB deviceMemoryBandwidth: 681.88GiB/s\n",
            "2020-05-31 19:02:09.811652: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1\n",
            "2020-05-31 19:02:09.811689: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10\n",
            "2020-05-31 19:02:09.811711: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10\n",
            "2020-05-31 19:02:09.811731: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10\n",
            "2020-05-31 19:02:09.811753: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10\n",
            "2020-05-31 19:02:09.811771: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10\n",
            "2020-05-31 19:02:09.811790: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7\n",
            "2020-05-31 19:02:09.811855: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2020-05-31 19:02:09.812429: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2020-05-31 19:02:09.812907: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1703] Adding visible gpu devices: 0\n",
            "2020-05-31 19:02:09.816666: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1\n",
            "2020-05-31 19:02:16.193763: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
            "2020-05-31 19:02:16.193817: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1108]      0 \n",
            "2020-05-31 19:02:16.193835: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 0:   N \n",
            "2020-05-31 19:02:16.200156: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2020-05-31 19:02:16.200796: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2020-05-31 19:02:16.201322: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:39] Overriding allow_growth setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
            "2020-05-31 19:02:16.201366: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 14973 MB memory) -> physical GPU (device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0)\n",
            "input shape is (84, 84, 4)\n",
            "input shape is (84, 84, 4)\n",
            "Restoring from None\n",
            "trajectory length: 7904\n",
            "target network update\n",
            "  0% 0/80000 [00:00<?, ?it/s]2020-05-31 19:02:23.689746: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10\n",
            "2020-05-31 19:02:25.155198: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7\n",
            "-----------------------------------------\n",
            "| % time spent exploring  | 1           |\n",
            "| demo sample rate        | 1           |\n",
            "| elapsed time            | 00:00:08    |\n",
            "| episodes                | 0           |\n",
            "| epsilon                 | 0           |\n",
            "| loss_l2                 | 0.020358332 |\n",
            "| loss_margin             | 2.069506    |\n",
            "| loss_n_td               | 0.7611711   |\n",
            "| loss_td                 | 0.8586774   |\n",
            "| losses_all              | 3.7097127   |\n",
            "| max 100 episode reward  | 0           |\n",
            "| mean 100 episode reward | 0           |\n",
            "| min 100 episode reward  | 0           |\n",
            "| pre_train               | True        |\n",
            "| steps                   | 0           |\n",
            "-----------------------------------------\n",
            " 12% 9988/80000 [01:30<09:36, 121.44it/s]-----------------------------------------\n",
            "| % time spent exploring  | 1           |\n",
            "| demo sample rate        | 1           |\n",
            "| elapsed time            | 00:01:31    |\n",
            "| episodes                | 0           |\n",
            "| epsilon                 | 0           |\n",
            "| loss_l2                 | 0.013551723 |\n",
            "| loss_margin             | 0.1449842   |\n",
            "| loss_n_td               | 0.045100257 |\n",
            "| loss_td                 | 0.060130637 |\n",
            "| losses_all              | 0.23164174  |\n",
            "| max 100 episode reward  | 0           |\n",
            "| mean 100 episode reward | 0           |\n",
            "| min 100 episode reward  | 0           |\n",
            "| pre_train               | True        |\n",
            "| steps                   | 10000       |\n",
            "-----------------------------------------\n",
            " 25% 19997/80000 [02:53<08:17, 120.72it/s]-----------------------------------------\n",
            "| % time spent exploring  | 1           |\n",
            "| demo sample rate        | 1           |\n",
            "| elapsed time            | 00:02:54    |\n",
            "| episodes                | 0           |\n",
            "| epsilon                 | 0           |\n",
            "| loss_l2                 | 0.014499397 |\n",
            "| loss_margin             | 0.04270754  |\n",
            "| loss_n_td               | 0.03130194  |\n",
            "| loss_td                 | 0.029267937 |\n",
            "| losses_all              | 0.10908112  |\n",
            "| max 100 episode reward  | 0           |\n",
            "| mean 100 episode reward | 0           |\n",
            "| min 100 episode reward  | 0           |\n",
            "| pre_train               | True        |\n",
            "| steps                   | 20000       |\n",
            "-----------------------------------------\n",
            " 37% 29992/80000 [04:16<06:55, 120.39it/s]-----------------------------------------\n",
            "| % time spent exploring  | 1           |\n",
            "| demo sample rate        | 1           |\n",
            "| elapsed time            | 00:04:17    |\n",
            "| episodes                | 0           |\n",
            "| epsilon                 | 0           |\n",
            "| loss_l2                 | 0.014699174 |\n",
            "| loss_margin             | 0.0         |\n",
            "| loss_n_td               | 0.016216906 |\n",
            "| loss_td                 | 0.009721957 |\n",
            "| losses_all              | 0.03837306  |\n",
            "| max 100 episode reward  | 0           |\n",
            "| mean 100 episode reward | 0           |\n",
            "| min 100 episode reward  | 0           |\n",
            "| pre_train               | True        |\n",
            "| steps                   | 30000       |\n",
            "-----------------------------------------\n",
            " 50% 39993/80000 [05:39<05:36, 118.78it/s]-----------------------------------------\n",
            "| % time spent exploring  | 1           |\n",
            "| demo sample rate        | 1           |\n",
            "| elapsed time            | 00:05:39    |\n",
            "| episodes                | 0           |\n",
            "| epsilon                 | 0           |\n",
            "| loss_l2                 | 0.01453488  |\n",
            "| loss_margin             | 0.09214203  |\n",
            "| loss_n_td               | 0.02602587  |\n",
            "| loss_td                 | 0.016996609 |\n",
            "| losses_all              | 0.1325294   |\n",
            "| max 100 episode reward  | 0           |\n",
            "| mean 100 episode reward | 0           |\n",
            "| min 100 episode reward  | 0           |\n",
            "| pre_train               | True        |\n",
            "| steps                   | 40000       |\n",
            "-----------------------------------------\n",
            " 62% 49994/80000 [07:01<04:06, 121.71it/s]------------------------------------------\n",
            "| % time spent exploring  | 1            |\n",
            "| demo sample rate        | 1            |\n",
            "| elapsed time            | 00:07:02     |\n",
            "| episodes                | 0            |\n",
            "| epsilon                 | 0            |\n",
            "| loss_l2                 | 0.014170906  |\n",
            "| loss_margin             | 0.0024043284 |\n",
            "| loss_n_td               | 0.007243587  |\n",
            "| loss_td                 | 0.006791706  |\n",
            "| losses_all              | 0.029697161  |\n",
            "| max 100 episode reward  | 0            |\n",
            "| mean 100 episode reward | 0            |\n",
            "| min 100 episode reward  | 0            |\n",
            "| pre_train               | True         |\n",
            "| steps                   | 50000        |\n",
            "------------------------------------------\n",
            " 75% 59999/80000 [08:24<02:47, 119.29it/s]------------------------------------------\n",
            "| % time spent exploring  | 1            |\n",
            "| demo sample rate        | 1            |\n",
            "| elapsed time            | 00:08:24     |\n",
            "| episodes                | 0            |\n",
            "| epsilon                 | 0            |\n",
            "| loss_l2                 | 0.013711213  |\n",
            "| loss_margin             | 0.001578738  |\n",
            "| loss_n_td               | 0.0048797335 |\n",
            "| loss_td                 | 0.0053641265 |\n",
            "| losses_all              | 0.02500055   |\n",
            "| max 100 episode reward  | 0            |\n",
            "| mean 100 episode reward | 0            |\n",
            "| min 100 episode reward  | 0            |\n",
            "| pre_train               | True         |\n",
            "| steps                   | 60000        |\n",
            "------------------------------------------\n",
            " 87% 69995/80000 [09:46<01:22, 121.71it/s]------------------------------------------\n",
            "| % time spent exploring  | 1            |\n",
            "| demo sample rate        | 1            |\n",
            "| elapsed time            | 00:09:47     |\n",
            "| episodes                | 0            |\n",
            "| epsilon                 | 0            |\n",
            "| loss_l2                 | 0.013184984  |\n",
            "| loss_margin             | 0.0054628737 |\n",
            "| loss_n_td               | 0.0062004677 |\n",
            "| loss_td                 | 0.0056741987 |\n",
            "| losses_all              | 0.02925535   |\n",
            "| max 100 episode reward  | 0            |\n",
            "| mean 100 episode reward | 0            |\n",
            "| min 100 episode reward  | 0            |\n",
            "| pre_train               | True         |\n",
            "| steps                   | 70000        |\n",
            "------------------------------------------\n",
            "100% 80000/80000 [11:09<00:00, 119.53it/s]\n",
            "  0% 0/1000000 [00:00<?, ?it/s]saved checkpoint\n",
            "  0% 1593/1000000 [06:41<69:39:08,  3.98it/s]saved best model\n",
            "  1% 6962/1000000 [29:13<68:37:06,  4.02it/s]saved best model\n",
            "  2% 21047/1000000 [1:27:33<68:41:35,  3.96it/s]------------------------------------------\n",
            "| % time spent exploring  | 1            |\n",
            "| demo sample rate        | 0.766        |\n",
            "| elapsed time            | 01:38:45     |\n",
            "| episodes                | 10           |\n",
            "| epsilon                 | 0.01         |\n",
            "| loss_l2                 | 0.008230355  |\n",
            "| loss_margin             | 0.0          |\n",
            "| loss_n_td               | 0.007795605  |\n",
            "| loss_td                 | 0.0055720787 |\n",
            "| losses_all              | 0.010699887  |\n",
            "| max 100 episode reward  | 23           |\n",
            "| mean 100 episode reward | 8.6          |\n",
            "| min 100 episode reward  | 1            |\n",
            "| pre_train               | False        |\n",
            "| steps                   | 21047        |\n",
            "------------------------------------------\n",
            "  3% 34302/1000000 [2:23:00<65:39:45,  4.09it/s]saved best model\n",
            "  4% 40718/1000000 [2:49:26<65:15:59,  4.08it/s]------------------------------------------\n",
            "| % time spent exploring  | 1            |\n",
            "| demo sample rate        | 0.654        |\n",
            "| elapsed time            | 03:00:37     |\n",
            "| episodes                | 20           |\n",
            "| epsilon                 | 0.01         |\n",
            "| loss_l2                 | 0.006390909  |\n",
            "| loss_margin             | 0.0          |\n",
            "| loss_n_td               | 0.0028188515 |\n",
            "| loss_td                 | 0.032274984  |\n",
            "| losses_all              | 0.012314454  |\n",
            "| max 100 episode reward  | 62           |\n",
            "| mean 100 episode reward | 13           |\n",
            "| min 100 episode reward  | 1            |\n",
            "| pre_train               | False        |\n",
            "| steps                   | 40718        |\n",
            "------------------------------------------\n",
            "  6% 56460/1000000 [3:54:06<63:45:35,  4.11it/s]------------------------------------------\n",
            "| % time spent exploring  | 1            |\n",
            "| demo sample rate        | 0.592        |\n",
            "| elapsed time            | 04:05:18     |\n",
            "| episodes                | 30           |\n",
            "| epsilon                 | 0.01         |\n",
            "| loss_l2                 | 0.00550894   |\n",
            "| loss_margin             | 0.0010034293 |\n",
            "| loss_n_td               | 0.004341241  |\n",
            "| loss_td                 | 0.00698175   |\n",
            "| losses_all              | 0.008654319  |\n",
            "| max 100 episode reward  | 62           |\n",
            "| mean 100 episode reward | 13.1         |\n",
            "| min 100 episode reward  | 1            |\n",
            "| pre_train               | False        |\n",
            "| steps                   | 56460        |\n",
            "------------------------------------------\n",
            "  7% 68754/1000000 [4:44:34<62:54:19,  4.11it/s]-------------------------------------------\n",
            "| % time spent exploring  | 1             |\n",
            "| demo sample rate        | 0.553         |\n",
            "| elapsed time            | 04:55:46      |\n",
            "| episodes                | 40            |\n",
            "| epsilon                 | 0.01          |\n",
            "| loss_l2                 | 0.005014208   |\n",
            "| loss_margin             | 0.00031442195 |\n",
            "| loss_n_td               | 0.0018470427  |\n",
            "| loss_td                 | 0.0022338894  |\n",
            "| losses_all              | 0.0058714435  |\n",
            "| max 100 episode reward  | 62            |\n",
            "| mean 100 episode reward | 11.7          |\n",
            "| min 100 episode reward  | 1             |\n",
            "| pre_train               | False         |\n",
            "| steps                   | 68754         |\n",
            "-------------------------------------------\n",
            "  9% 87418/1000000 [6:05:10<69:09:21,  3.67it/s]------------------------------------------\n",
            "| % time spent exploring  | 1            |\n",
            "| demo sample rate        | 0.505        |\n",
            "| elapsed time            | 06:16:22     |\n",
            "| episodes                | 50           |\n",
            "| epsilon                 | 0.01         |\n",
            "| loss_l2                 | 0.004473562  |\n",
            "| loss_margin             | 0.0          |\n",
            "| loss_n_td               | 0.002994536  |\n",
            "| loss_td                 | 0.0027686097 |\n",
            "| losses_all              | 0.0058664195 |\n",
            "| max 100 episode reward  | 62           |\n",
            "| mean 100 episode reward | 12.2         |\n",
            "| min 100 episode reward  | 1            |\n",
            "| pre_train               | False        |\n",
            "| steps                   | 87418        |\n",
            "------------------------------------------\n",
            " 10% 100000/1000000 [7:01:00<66:22:41,  3.77it/s]saved checkpoint\n",
            " 11% 105436/1000000 [7:25:07<64:37:54,  3.84it/s]------------------------------------------\n",
            "| % time spent exploring  | 1            |\n",
            "| demo sample rate        | 0.467        |\n",
            "| elapsed time            | 07:36:19     |\n",
            "| episodes                | 60           |\n",
            "| epsilon                 | 0.01         |\n",
            "| loss_l2                 | 0.004119204  |\n",
            "| loss_margin             | 0.01922749   |\n",
            "| loss_n_td               | 0.0030735792 |\n",
            "| loss_td                 | 0.0030135538 |\n",
            "| losses_all              | 0.008957932  |\n",
            "| max 100 episode reward  | 62           |\n",
            "| mean 100 episode reward | 11.5         |\n",
            "| min 100 episode reward  | 1            |\n",
            "| pre_train               | False        |\n",
            "| steps                   | 105436       |\n",
            "------------------------------------------\n",
            " 12% 116421/1000000 [8:10:53<59:45:06,  4.11it/s]------------------------------------------\n",
            "| % time spent exploring  | 1            |\n",
            "| demo sample rate        | 0.447        |\n",
            "| elapsed time            | 08:22:05     |\n",
            "| episodes                | 70           |\n",
            "| epsilon                 | 0.01         |\n",
            "| loss_l2                 | 0.0039404957 |\n",
            "| loss_margin             | 0.0          |\n",
            "| loss_n_td               | 0.0031544697 |\n",
            "| loss_td                 | 0.0048100436 |\n",
            "| losses_all              | 0.005662076  |\n",
            "| max 100 episode reward  | 62           |\n",
            "| mean 100 episode reward | 11.1         |\n",
            "| min 100 episode reward  | 1            |\n",
            "| pre_train               | False        |\n",
            "| steps                   | 116421       |\n",
            "------------------------------------------\n",
            " 13% 131828/1000000 [9:13:43<58:25:29,  4.13it/s]------------------------------------------\n",
            "| % time spent exploring  | 1            |\n",
            "| demo sample rate        | 0.423        |\n",
            "| elapsed time            | 09:24:55     |\n",
            "| episodes                | 80           |\n",
            "| epsilon                 | 0.01         |\n",
            "| loss_l2                 | 0.003743165  |\n",
            "| loss_margin             | 0.0          |\n",
            "| loss_n_td               | 0.0006868905 |\n",
            "| loss_td                 | 0.024287283  |\n",
            "| losses_all              | 0.008074721  |\n",
            "| max 100 episode reward  | 62           |\n",
            "| mean 100 episode reward | 10.7         |\n",
            "| min 100 episode reward  | 1            |\n",
            "| pre_train               | False        |\n",
            "| steps                   | 131828       |\n",
            "------------------------------------------\n",
            " 14% 140298/1000000 [9:47:46<61:55:08,  3.86it/s]"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "0u3ghxyr50fh",
        "colab_type": "code",
        "outputId": "8d74b2fc-0aa6-4c82-e8e7-47f1973b7a5f",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 85
        }
      },
      "source": [
        "!git status"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "On branch master\n",
            "Your branch is up to date with 'origin/master'.\n",
            "\n",
            "nothing to commit, working tree clean\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "k1pjp4M7_2Uu",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}